#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: copy_zhejiang_company.py
@time: 2018/1/2 10:43
"""

import sys

sys.path.append('..')
sys.path.append('../..')
from common import util

from common.mongo import MongDb
from logger import Logger

MONGO_DB_SOURCE = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('copy_to_crawl.log').get_logger()

source_db = MongDb(MONGO_DB_SOURCE['host'], MONGO_DB_SOURCE['port'], MONGO_DB_SOURCE['db'],
                   MONGO_DB_SOURCE['username'],
                   MONGO_DB_SOURCE['password'], log=log)


def main():
    source_table_name = 'zhejiang_2017_company'
    target_table_name = 'new_zhejiang_company_list'

    count = 0
    result_list = []
    for item in source_db.traverse_batch_field(source_table_name, {}, ['_id']):
        count += 1
        company = item.get('_id')

        data = {
            '_id': company,
            'search_status': 0,
            'crawl_status': 0,
            '_in_time': util.get_now_time()
        }
        result_list.append(data)
        if len(result_list) >= 500:
            source_db.insert_batch_data(target_table_name, result_list)
            del result_list[:]

        if count % 1000 == 0:
            log.info("当前复制进度: count = {}".format(count))

    if len(result_list) > 0:
        source_db.insert_batch_data(target_table_name, result_list)
        del result_list[:]
    log.info("复制完成...")


if __name__ == '__main__':
    main()
